package day02

import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.rdd.RDD

/**
  * 需求：计算uv独立访客
  */
object UV {
  def main(args: Array[String]): Unit = {
    //初始化
    val sparkConf: SparkConf = new SparkConf().setAppName("PV").setMaster("local[2]")
    val sc: SparkContext = new SparkContext(sparkConf)
    sc.setLogLevel("WARN")
    //加载文件
    val data: RDD[String] = sc.textFile("file:///F:\\weblog\\input\\access.log.20181101.dat");
    val ips: RDD[String] = data.map(x=>x.split(" ")(0))
    val distinct: RDD[String] = ips.distinct()
    //统计出现了多少个不重复的ip有多少个
    val result: Long = distinct.count()
    println(result)
    sc.stop()
  }
}
